import os
import time as time_sleep

import requests
from selenium import webdriver
import csv

from selenium.common import NoSuchElementException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By


# 定义一个爬虫类
class SingleArticleSpider(object):
    # 初始化
    # 定义初始页面url
    def __init__(self):
        self.driver = None

        # 启动，多重影分身术
        # proxy = self.get_proxy().get("proxy")  # 从ProxyPool获取
        # format_proxy = "http://{}".format(proxy)
        # print("线程：" + str(threed_mark) + "本次代理IP为：" + format_proxy)
        # chrome_options.add_argument(f"--proxy-server={format_proxy}")


    # 主函数
    def run(self, url="https://juejin.cn/post/7296384298902929417", threed_mark="deep_articles"):

        # 大部分的优化手段都在options中进行设置
        chrome_options = Options()
        chrome_options.add_argument("--window-size=1920,1080")
        chrome_options.add_argument("--disable-extensions")
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--disable-gpu")
        chrome_options.add_argument("--disable-software-rasterizer")
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--ignore-certificate-errors')
        chrome_options.add_argument('--allow-running-insecure-content')
        chrome_options.add_argument("blink-settings=imagesEnabled=false")
        # 保持登录状态，正常用户浏览器启动
        chrome_options.add_argument('--user-data-dir=C:\\Users\\26480\\AppData\\Local\\Google\\Chrome\\User Data')
        # 设置chromedriver，并打开webdriver  参数options=chrome_options
        self.driver = webdriver.Chrome(options=chrome_options)
        # self.driver = webdriver.Chrome()
        self.driver.get(url)
        self.driver.implicitly_wait(2)  # seconds

        # 此处写下 爬取哪块栏目 的函数
        self.sp_article(url, threed_mark)

        # 关闭webdriver
        self.driver.close()

    # 解析页面，数据清洗
    # 个人信息
    def sp_article(self, pass_url, pass_threed_mark):
        driver = self.driver

        url = pass_url
        title = "NOTFOUND"
        time = "NOTFOUND"
        watchs = "NOTFOUND"
        readDuration = "NOTFOUND"
        column = "NOTFOUND"
        likes = "NOTFOUND"
        comments = "NOTFOUND"
        stars = "NOTFOUND"
        author = "NOTFOUND"
        author_url = "NOTFOUND"
        author_lever = "NOTFOUND"
        author_articles = "NOTFOUND"
        author_watchs = "NOTFOUND"
        author_fans = "NOTFOUND"
        category = "NOTFOUND"
        topic = "NOTFOUND"

        try:
            title = driver.find_element(By.CSS_SELECTOR,
                                        '#juejin > div.view-container > main > div > div.main-area.article-area > article > h1').text
        except NoSuchElementException:
            title = "NOTFOUND"

        time_sleep.sleep(2)  # 等一会，等加载数据

        try:
            time = driver.find_element(By.CSS_SELECTOR,
                                       '#juejin > div.view-container > main > div > div.main-area.article-area > article > div.author-info-block > div.author-info-box > div.meta-box > time').get_attribute(
                "datetime")
        except NoSuchElementException:
            time = "NOTFOUND"

        try:
            watchs = driver.find_element(By.CSS_SELECTOR,
                                         '#juejin > div.view-container > main > div > div.main-area.article-area > article > div.author-info-block > div.author-info-box > div.meta-box > span.views-count').text
        except NoSuchElementException:
            watchs = "NOTFOUND"

        try:
            readDuration = driver.find_element(By.CSS_SELECTOR,
                                               '#juejin > div.view-container > main > div > div.main-area.article-area > article > div.author-info-block > div.author-info-box > div.meta-box > span.read-time').text
        except NoSuchElementException:
            readDuration = "NOTFOUND"

        try:
            column = driver.find_element(By.CSS_SELECTOR,
                                         '#juejin > div.view-container > main > div > div.main-area.article-area > article > div.author-info-block > div.author-info-box > div.meta-box > div > div.title').text
        except NoSuchElementException:
            column = "NOTFOUND"

        try:
            likes = driver.find_element(By.CSS_SELECTOR,
                                        '#juejin > div.view-container > main > div > div.article-suspended-panel.dynamic-data-ready > div:nth-child(2)').get_attribute(
                "badge")
        except NoSuchElementException:
            likes = "NOTFOUND"

        try:
            comments = driver.find_element(By.CSS_SELECTOR,
                                           '#juejin > div.view-container > main > div > div.article-suspended-panel.dynamic-data-ready > div:nth-child(3)').get_attribute(
                "badge")
        except NoSuchElementException:
            comments = "NOTFOUND"

        try:
            stars = driver.find_element(By.CSS_SELECTOR,
                                        '#juejin > div.view-container > main > div > div.article-suspended-panel.dynamic-data-ready > div:nth-child(4)').get_attribute(
                "badge")
        except NoSuchElementException:
            stars = "NOTFOUND"

        try:
            author = driver.find_element(By.CSS_SELECTOR,
                                         '#sidebar-container > div.sidebar-block.author-block.author-block-container.pure > a > div.info-box > span > span.name').text
        except NoSuchElementException:
            author = "NOTFOUND"

        try:
            author_url = driver.find_element(By.CSS_SELECTOR,
                                             '#sidebar-container > div.sidebar-block.author-block.author-block-container.pure > a').get_attribute(
                "href")
        except NoSuchElementException:
            author_url = "NOTFOUND"

        try:
            author_lever = driver.find_element(By.CSS_SELECTOR,
                                               '#sidebar-container > div.sidebar-block.author-block.author-block-container.pure > a > div.info-box > span > span.rank > img').get_attribute(
                "title")
        except NoSuchElementException:
            author_lever = "NOTFOUND"

        try:
            author_articles = driver.find_element(By.CSS_SELECTOR,
                                                  '#sidebar-container > div.sidebar-block.author-block.author-block-container.pure > div.count-container > a:nth-child(1) > div.count').text
        except NoSuchElementException:
            author_articles = "NOTFOUND"

        try:
            author_watchs = driver.find_element(By.CSS_SELECTOR,
                                                '#sidebar-container > div.sidebar-block.author-block.author-block-container.pure > div.count-container > a:nth-child(2) > div.count').text
        except NoSuchElementException:
            author_watchs = "NOTFOUND"

        try:
            author_fans = driver.find_element(By.CSS_SELECTOR,
                                              '#sidebar-container > div.sidebar-block.author-block.author-block-container.pure > div.count-container > a:nth-child(3) > div.count').text
        except NoSuchElementException:
            author_fans = "NOTFOUND"

        try:
            category = driver.find_element(By.CSS_SELECTOR,
                                           '#juejin > div.view-container > main > div > div.main-area.article-area > div.article-end > div > div:nth-child(1) > div.tag-list-container').text
        except NoSuchElementException:
            category = "NOTFOUND"

        try:
            topic = driver.find_element(By.CSS_SELECTOR,
                                        '#juejin > div.view-container > main > div > div.main-area.article-area > div.article-end > div > div.tag-list.theme-list > div.theme-list-container > a > span').text
        except NoSuchElementException:
            topic = "NOTFOUND"

        # 保存数据
        save_url = "csv_collect/" + pass_threed_mark + ".csv"
        # 判断文件是否存在
        if os.path.exists(save_url):
            with open(save_url, "a+", newline="", encoding="utf-8") as f:
                # 生成csv操作对象
                writer = csv.writer(f)
                csv_data = [
                    url,
                    title,
                    time,
                    watchs,
                    readDuration,
                    column,
                    likes,
                    comments,
                    stars,
                    author,
                    author_url,
                    author_lever,
                    author_articles,
                    author_watchs,
                    author_fans,
                    category,
                    topic
                ]
                writer.writerow(csv_data)
        else:
            with open(save_url, "a+", newline="", encoding="utf-8") as f:
                # 生成csv操作对象
                writer = csv.writer(f)
                # 制作表头
                csv_title = [
                    "url",
                    "title",
                    "time",
                    "watchs",
                    "readDuration",
                    "column",
                    "likes",
                    "comments",
                    "stars",
                    "author",
                    "author_url",
                    "author_lever",
                    "author_articles",
                    "author_watchs",
                    "author_fans",
                    "category",
                    "topic"
                ]
                csv_data = [
                    url,
                    title,
                    time,
                    watchs,
                    readDuration,
                    column,
                    likes,
                    comments,
                    stars,
                    author,
                    author_url,
                    author_lever,
                    author_articles,
                    author_watchs,
                    author_fans,
                    category,
                    topic
                ]
                writer.writerow(csv_title)
                writer.writerow(csv_data)

        # 控制台提示信息
        print("----线程: "+str(pass_threed_mark)+"-----SingleArticleSpider----爬取文章: " + title + " 成功----------")

    def destroy(self):
        self.driver.quit()

    # 去IP代理池获取代理
    # def get_proxy(self):
    #     return requests.get("http://127.0.0.1:5010/get/").json()

    # 删除代理
    # def delete_proxy(self, proxy):
    #     requests.get("http://127.0.0.1:5010/delete/?proxy={}".format(proxy))

# 以脚本方式启动
if __name__ == "__main__":
    # 捕捉异常错误
    try:
        spider = SingleArticleSpider()
        spider.run()
    except Exception as e:
        print("错误:", e)
